from langchain_community.document_loaders import TextLoader

loader = TextLoader("datafile\loader.md", encoding="utf-8")
res    = loader.load()
print(res)

from langchain_community.document_loaders import CSVLoader
loader = CSVLoader(file_path="datafile\loader.csv", source_column="People", encoding="utf-8")
data   = loader.load()
print(data)

print(">>>>>>>>>>>>>>>")
# pip install unstructured[xlsx]
# import nltk
# from nltk.tokenize import word_tokenize
# from nltk.corpus import stopwords
# from nltk.tag import pos_tag
# # nltk.download('punkt')
# # nltk.download('stopwords')
# nltk.download('averaged_perceptron_tagger')
#
# from langchain_community.document_loaders import DirectoryLoader
#
# loader = DirectoryLoader(path="datafile/example/", glob="*.xlsx")
# docs = loader.load()
# len(docs)
# print(docs)

# pip install jq
from langchain_community.document_loaders import BSHTMLLoader
loader = BSHTMLLoader(".\datafile\loader.html", open_encoding="utf-8")
data = loader.load()
print(data)

from langchain_community.document_loaders import JSONLoader

loader = JSONLoader(
    file_path=".\datafile\simple_prompt.json", jq_schema=".template", text_content=True
)
data = loader.load()
print(data)

from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("./datafile/loader.pdf")
data = loader.load_and_split()
print(data)